import requests
import pandas as pd
import pickle
import os
from pathlib import Path
import plotly.express as px
import plotly.graph_objects as go
import time
import numpy as np
import plotly.io as pio
from tqdm import tqdm
pio.renderers.default='notebook'
df_sp500 = pd.read_csv('sp500_tickers.csv')
df_sp500
| Symbol | Description | |
|---|---|---|
| 0 | BAC | Bank of America Corp |
| 1 | AAPL | Apple Inc |
| 2 | MSFT | Microsoft Corp |
| 3 | GOOG | Alphabet Inc Class C |
| 4 | GOOGL | Alphabet Inc Class A |
| ... | ... | ... |
| 499 | ALK | Alaska Air Group Inc |
| 500 | PENN | Penn National Gaming Inc |
| 501 | IPGP | IPG Photonics Corp |
| 502 | PVH | PVH Corp |
| 503 | NCLH | Norwegian Cruise Line Holdings Ltd |
504 rows × 2 columns
tickers = df_sp500['Symbol'].values
earnings_data = {}
for i in tqdm(range(len(tickers))):
t = tickers[i]
time.sleep(.85)
url = 'https://www.alphavantage.co/query?function=EARNINGS&symbol='+t+'&apikey='+'296ULICGSB63VL7A'
r = requests.get(url)
data = r.json()
earnings_data[t] = data
with open('sp500_earnings.pickle', 'wb') as f:
pickle.dump(earnings_data, f)
100%|██████████| 504/504 [09:49<00:00, 1.17s/it]
tickers = df_sp500['Symbol'].values
close_data = {}
for i in tqdm(range(len(tickers))):
t = tickers[i]
time.sleep(.85)
url = 'https://www.alphavantage.co/query?function=TIME_SERIES_MONTHLY&symbol='+t+'&apikey='+'296ULICGSB63VL7A'
r = requests.get(url)
data = r.json()
close_data[t] = data
with open('sp500_close.pickle', 'wb') as f:
pickle.dump(close_data, f)
100%|██████████| 504/504 [11:16<00:00, 1.34s/it]
# open sp500 earnings
sp500_earnings_fileo = open('sp500_earnings.pickle', 'rb')
ern_datao = pickle.load(sp500_earnings_fileo)
print(len(ern_datao.keys()))
# open sp500 close
sp500_close_fileo = open('sp500_close.pickle', 'rb')
close_datao = pickle.load(sp500_close_fileo)
print(len(close_datao.keys()))
504 504
close_data_keys = list(close_datao.keys())
pe_df_list = []
for t in close_data_keys:
if 'Monthly Time Series' in close_datao[t] and 'annualEarnings' in ern_datao[t]:
close_df = pd.DataFrame.from_dict(close_datao[t]['Monthly Time Series']).T
close_df.index = pd.to_datetime(close_df.index)
ern_df = pd.DataFrame.from_dict(ern_datao[t]['annualEarnings'])
ern_df['fiscalDateEnding'] = pd.to_datetime(ern_df['fiscalDateEnding'])
ern_df = ern_df.set_index(['fiscalDateEnding'])
ern_df.index = ern_df.index.year
close_df = close_df.astype({'4. close':'float'})
close_df = close_df['4. close']
summary_close_df = close_df.groupby(close_df.index.year).mean()
summary_close_df = summary_close_df[~summary_close_df.index.duplicated(keep='first')]
ern_df = ern_df[~ern_df.index.duplicated(keep='first')]
pe_df = pd.concat([summary_close_df,ern_df],axis=1).reset_index()
pe_df = pe_df.astype({'reportedEPS':'float'})
# drop zero value rows so we don't get inf
pe_df = pe_df[pe_df['reportedEPS']!=0]
pe_df['price_to_earnings'] = pe_df['4. close'] / pe_df['reportedEPS']
pe_df['Symbol'] = t
pe_df_list.append(pe_df)
else:
print('no data')
pe_df = pd.concat(pe_df_list)
pe_df = pe_df.rename(columns={'index':'year','4. close':'close','reportedEPS':'eps','price_to_earnings':'pe','Symbol':'symbol'})
pe_df = pe_df.dropna()
# drop any 2022 years because they will have a quarterly eps which inflates PE
pe_df = pe_df[pe_df['year'] != 2022]
no data no data no data
pe_df.max()
year 2021 close 4980.209167 eps 321.19 pe 13417.316017 symbol ZTS dtype: object
pe_dist = px.histogram(pe_df,
x='pe',
marginal="box",
nbins=60)
pe_dist.show()
std = pe_df['pe'].std()
print('PE standard deviation is ', std)
# drop everything over two standard deviations - we'll call these outliers
pe_df = pe_df[pe_df['pe'] < (std*2)]
pe_df = pe_df[pe_df['pe'] > (std*-2)]
PE standard deviation is 375.6009220441895
pe_dist = px.histogram(pe_df,
x='pe',
marginal="box",
nbins=60)
pe_dist.show()
# drop any 2020 years because covid through off pe
pe_df = pe_df[pe_df['year'] != 2020]
# graph some of the top names by pe
sp_top_df = pe_df[pe_df['symbol'].isin(['AAPL','MSFT','BRK.B','GOOG','XOM','CVX','PG','UNH','JPM','V','HD','BAC','KO','PFE','ADBE','FB'])]
mean_df = pe_df.groupby('year')['pe'].mean()
mean_lst = [mean_df.mean()]*mean_df.size
mean_lst_top = sp_top_df.groupby('year')['pe'].mean().mean()
pe_dist = px.line(sp_top_df,
x='year',
y='pe',
color='symbol')
pe_dist.add_trace(go.Scatter(
x=mean_df.index,
y=mean_df,
mode='lines',
name='SP 500 Yearly Mean PE',
line=dict(color='gray',width=3,dash='dot')
))
pe_dist.add_trace(go.Scatter(x=mean_df.index,
y=mean_lst,
mode='lines',
name='SP 500 Mean PE',
line=dict(color='black',width=1)
))
pe_dist.show()
print('20 year PE mean for all sp500 stocks',np.round(mean_df.mean()))
print('20 year PE mean for a basket of top sp500 stocks',np.round(mean_lst_top))
20 year PE mean for all sp500 stocks 31.0 20 year PE mean for a basket of top sp500 stocks 46.0